By:
Claudia Vo
Codey Phoun
Sudhin Domala
Â
For:
SJSU CS286 Project
Dr. Andreopoulos
Spring 2021
Â
Sample abbreviations
HQE = normal media exponential growth (control)
HQ10E = normal media + added 10% produced water (intermediate treatment)
PWE = 100% produced water exponential (High treatment)
HQ_ST = normal media stationary growth (control)
PW_ST = 100% produced water stationary (treatment)
Import libraries
library(readr)
library(edgeR)
## Loading required package: limma
library(limma)
library(pheatmap)
Gene count data was previously created with STAR
Read in the count data as a data frame
URL = "https://raw.githubusercontent.com/codey-phoun/pw_oilfield/main/STAR_results/STAR_counts.txt"
star_data = read_tsv(URL)
##
## -- Column specification --------------------------------------------------------
## cols(
## gene_name = col_character(),
## HQ10E1 = col_double(),
## HQ10E2 = col_double(),
## HQE1 = col_double(),
## HQE2 = col_double(),
## HQ_ST1 = col_double(),
## HQ_ST2 = col_double(),
## PWE1 = col_double(),
## PWE2 = col_double(),
## PW_ST1 = col_double(),
## PW_ST2 = col_double()
## )
Create count matrix
star_data = as.data.frame(star_data)
# Set row names to be the gene name and remove gene_name column
rownames(star_data) <- star_data$gene_name
star_data[1] <- NULL
head(star_data)
Create a DGE list object
group <- rep(c("HQ10E","HQE","HQ_ST","PWE","PW_ST"),
each = 2)
dge <- DGEList(counts = star_data,
group = group) #creates a DGE list object
dim(dge)
## [1] 12392 10
full_dge <- dge #store original data just in case
Sample library sizes
apply(dge$counts, 2, sum) # sum across columns/samples for each gene
## HQ10E1 HQ10E2 HQE1 HQE2 HQ_ST1 HQ_ST2 PWE1 PWE2
## 19654912 22355112 22447786 16151357 15135335 18504345 18680923 18905705
## PW_ST1 PW_ST2
## 17778297 21967279
Counts per million (cpm)
head(cpm(dge))
## HQ10E1 HQ10E2 HQE1 HQE2 HQ_ST1 HQ_ST2
## Phatr3_J31400 0.05087787 0.2236625 0.1336435 0.4334001 0.00000 0.2161654
## Phatr3_J42422 7.32641286 7.6939896 9.5332341 9.4728883 26.56036 24.3726541
## Phatr3_J31402 0.00000000 0.0000000 0.0000000 0.0000000 0.00000 0.0000000
## Phatr3_J42423 1.37370241 1.9682299 3.4301824 2.6004007 36.80130 27.4530117
## Phatr3_J42424 92.54684020 97.1589854 83.5717162 77.6405351 100.69153 99.4901468
## Phatr3_J7430 23.14942952 22.8135739 27.6196503 27.4280359 30.85495 27.7232185
## PWE1 PWE2 PW_ST1 PW_ST2
## Phatr3_J31400 0.1070611 0.05289409 0.5624836 0.910445
## Phatr3_J42422 6.2630738 5.34230276 11.4184165 11.517130
## Phatr3_J31402 0.0000000 0.00000000 0.0000000 0.000000
## Phatr3_J42423 2.4624051 2.06286938 6.1873193 13.838764
## Phatr3_J42424 81.2593682 79.76428279 58.2170497 71.105757
## Phatr3_J7430 17.8256717 24.11970355 18.9556964 16.570100
Keep only 100 counts per million in at least 2 samples
keep <- rowSums(cpm(dge)>100) >= 2
table(keep)
## keep
## FALSE TRUE
## 9141 3251
dge <- dge[keep,]
dim(dge) #check number of genes left after filtering
## [1] 3251 10
12392 genes are filtered down to 3251 genes
Reset the library size
dge$samples$lib.size <- colSums(dge$counts)
dge$samples
Library sizes before filtering:
apply(full_dge$counts, 2, sum)
## HQ10E1 HQ10E2 HQE1 HQE2 HQ_ST1 HQ_ST2 PWE1 PWE2
## 19654912 22355112 22447786 16151357 15135335 18504345 18680923 18905705
## PW_ST1 PW_ST2
## 17778297 21967279
Library sizes after filtering:
apply(dge$counts, 2, sum)
## HQ10E1 HQ10E2 HQE1 HQE2 HQ_ST1 HQ_ST2 PWE1 PWE2
## 15289361 17319367 17111487 12568283 11248709 13869364 14606604 14925823
## PW_ST1 PW_ST2
## 14619994 17756624
Normalize data by the trimmed mean of M-values (TMM) method proposed by Robinson and Oshlack (2010)
dge_norm=calcNormFactors(dge, method="TMM")
dge_norm
## An object of class "DGEList"
## $counts
## HQ10E1 HQ10E2 HQE1 HQE2 HQ_ST1 HQ_ST2 PWE1 PWE2 PW_ST1 PW_ST2
## Phatr3_J42426 8520 9594 8689 7289 5396 7052 6769 7244 5813 8494
## Phatr3_EG02408 3806 4483 3948 2637 224 238 3580 3084 216 368
## Phatr3_J31409 6448 7590 9257 5980 1008 1394 4863 4622 1381 1864
## Phatr3_J42429 1747 1981 2134 1377 2150 2386 1650 1699 1427 1398
## Phatr3_J4937 6608 7884 7049 4583 3737 5446 6837 6271 2997 3700
## 3246 more rows ...
##
## $samples
## group lib.size norm.factors
## HQ10E1 HQ10E 15289361 1.0347646
## HQ10E2 HQ10E 17319367 1.0442405
## HQE1 HQE 17111487 1.0884649
## HQE2 HQE 12568283 1.0460172
## HQ_ST1 HQ_ST 11248709 1.0977999
## HQ_ST2 HQ_ST 13869364 1.0696829
## PWE1 PWE 14606604 1.0251548
## PWE2 PWE 14925823 1.0196206
## PW_ST1 PW_ST 14619994 0.8029116
## PW_ST2 PW_ST 17756624 0.8247657
Multidimensional scaling plot to look at the inter-sample relationship by biological coefficient of variation (BCV) distance
colors_mds <- c("red", "palevioletred4", "darkorange", "blue", "deepskyblue4")
plotMDS(dge_norm, method="bcv", col=rep(colors_mds,each=2), pch = rep(c(0,7,1,15,19),each=2), cex = 1.75 )
legend("bottomleft",as.character(unique(dge_norm$samples$group)),col=colors_mds,pch=c(0,7,1,15,19), ncol=2)
Exponential growth samples cluster together stronger than stationary growth
Samples do not tend to cluster based on growth medium
Â
Create the design matrix
design.mat <- model.matrix(~ 0 + dge_norm$samples$group)
colnames(design.mat) <- levels(dge_norm$samples$group)
Estimate the dispersion with Cox-Reid profile-adjusted likelihood (CR) method in estimating dispersions with Generalized linear models (GLMs)
d2 <- estimateGLMCommonDisp(dge_norm,design.mat)
d2 <- estimateGLMTrendedDisp(d2,design.mat, method="auto")
# You can change method to "auto", "bin.spline", "power", "spline", "bin.loess".
# The default is "auto" which chooses "bin.spline" when > 200 tags and "power" otherwise.
d2 <- estimateGLMTagwiseDisp(d2,design.mat)
plotBCV(d2)
Calculate log2 CPM values
logcpm <- cpm(d2, log = TRUE)
Create a matrix of contrasts for anova-like testing (since data has different conditions that we want to compare)
ANOVA tests for DEGs between any set of groups with the null hypothesis that the mean gene expression is equal across all groups.
my_contrasts<-makeContrasts(
HQst_vs_PWst = HQ_ST-PW_ST, #PW vs normal stationary growth samples
HQE_vs_PWE = HQE-PWE, # PW vs normal exponential growth samples,
HQST_vs_HQE = HQ_ST-HQE, #normal exponential vs normal stationary
PWE_vs_PWST = PWE-PW_ST, #100% exponential vs 100% stationary PW samples
levels= design.mat
)
my_contrasts
## Contrasts
## Levels HQst_vs_PWst HQE_vs_PWE HQST_vs_HQE PWE_vs_PWST
## HQ_ST 1 0 1 0
## HQ10E 0 0 0 0
## HQE 0 1 -1 0
## PW_ST -1 0 0 -1
## PWE 0 -1 0 1
Fit a quasi-likelihood negative binomial generalized log-linear model to count data
fit <- glmQLFit(d2, design.mat)
HQst_vs_PWst <- glmQLFTest(fit, contrast = my_contrasts[,"HQst_vs_PWst"])
# top 50 DEGs by lowest adjusted p-values
HQst_vs_PWst_top50 <- topTags(HQst_vs_PWst,adjust.method = "BH", p.value = 0.05, n = 50)
HQst_vs_PWst_all <- topTags(HQst_vs_PWst,adjust.method = "BH", p.value = 0.05, n = nrow(HQst_vs_PWst$table))
HQst_vs_PWst_DEG <- HQst_vs_PWst_all[abs(HQst_vs_PWst_all$table$logFC) > 1, ]
HQst_vs_PWst_up <- HQst_vs_PWst_all[HQst_vs_PWst_all$table$logFC > 1, ]
HQst_vs_PWst_down <- HQst_vs_PWst_all[HQst_vs_PWst_all$table$logFC < -1, ]
head(HQst_vs_PWst_top50, n=10)
## Coefficient: 1*HQ_ST -1*PW_ST
## logFC logCPM F PValue FDR
## Phatr3_J47572 6.861896 10.355303 830.2771 7.879261e-10 1.438429e-06
## Phatr3_J50055 -4.296119 6.983754 711.6102 1.515469e-09 1.438429e-06
## Phatr3_J55010 5.536678 9.496214 633.6890 2.476507e-09 1.438429e-06
## Phatr3_J45193 -3.819702 9.779135 619.5537 2.724555e-09 1.438429e-06
## Phatr3_EG00333 5.960957 8.190521 614.3864 2.822823e-09 1.438429e-06
## Phatr3_J49202 -6.373400 12.846728 609.1734 2.926430e-09 1.438429e-06
## Phatr3_J10640 -10.070021 8.076858 601.0593 3.097202e-09 1.438429e-06
## Phatr3_J32747 4.480649 8.734087 537.9812 4.948332e-09 1.826148e-06
## Phatr3_J48882 -10.564538 13.377617 533.0443 5.144764e-09 1.826148e-06
## Phatr3_EG00065 -4.133901 9.136149 522.0666 5.617190e-09 1.826148e-06
Determine how many genes are up and down regulated for each pairwise comparison
For differentially expressed genes set logFC and pvalue threshold to 0.05 and 1
lfc=1 sets a 2-fold change minimum
is.de <- decideTestsDGE(HQst_vs_PWst,adjust.method="BH",p.value=0.05,lfc=1)
summary(is.de)
## 1*HQ_ST -1*PW_ST
## Down 509
## NotSig 2515
## Up 227
plotMD(HQst_vs_PWst, status=is.de, values=c(1,-1), col=c("maroon","cadetblue3"),
legend="topright", cex = .5, main = "MD Plot : HQ ST vs PW ST")
Volcano Plot
volcanoData <- cbind(HQst_vs_PWst_all$table$logFC, -log10(HQst_vs_PWst_all$table$FDR))
colnames(volcanoData) <- c("logFC", "negLogPval")
DEGs <- HQst_vs_PWst_all$table$FDR < 0.05 & abs(HQst_vs_PWst_all$table$logFC) > 1
point.col <- ifelse(DEGs, "red", "black")
plot(volcanoData, pch=19, cex = .5, col = point.col, main = "Volcano Plot : HQ ST vs PW ST")
HQst_vs_PWst_top50_log2_cpm <- logcpm[rownames(HQst_vs_PWst_top50$table),]
pheatmap(subset(HQst_vs_PWst_top50_log2_cpm,select=c(HQ_ST1,HQ_ST2,PW_ST1,PW_ST2)),
color=colorRampPalette(c("navy", "lavender", "maroon"))(15),fontsize_row=4)
HQE_vs_PWE <- glmQLFTest(fit, contrast = my_contrasts[,"HQE_vs_PWE"])
HQE_vs_PWE_top50 = topTags(HQE_vs_PWE,adjust.method = "BH", p.value = 0.05, n = 50)
HQE_vs_PWE_all = topTags(HQE_vs_PWE,adjust.method = "BH", p.value = 0.05, n = nrow(HQE_vs_PWE$table))
HQE_vs_PWE_DEG <- HQE_vs_PWE_all[abs(HQE_vs_PWE_all$table$logFC) > 1, ]
HQE_vs_PWE_up <- HQE_vs_PWE_all[HQE_vs_PWE_all$table$logFC > 1, ]
HQE_vs_PWE_down <- HQE_vs_PWE_all[HQE_vs_PWE_all$table$logFC < -1, ]
head(HQE_vs_PWE_top50, n=10)
## Coefficient: 1*HQE -1*PWE
## logFC logCPM F PValue FDR
## Phatr3_J34132 -8.609962 8.008770 727.2847 1.381801e-09 4.492234e-06
## Phatr3_J55010 -5.111485 9.496214 561.2096 4.139153e-09 6.323839e-06
## Phatr3_J49151 3.607201 10.200579 498.9501 6.800095e-09 6.323839e-06
## Phatr3_EG00333 -5.001485 8.190521 483.2628 7.780792e-09 6.323839e-06
## Phatr3_J15393 -5.145494 7.119785 417.1914 1.445075e-08 9.395875e-06
## Phatr3_J31433 -3.291023 10.002898 296.6263 6.035273e-08 3.270112e-05
## Phatr3_EG02360 6.246511 9.360258 268.4197 9.154341e-08 4.251538e-05
## Phatr3_J50500 -2.371713 9.723243 241.8646 1.411624e-07 5.619997e-05
## Phatr3_J51092 -3.844156 7.877635 236.2610 1.555828e-07 5.619997e-05
## Phatr3_J31619 -1.785251 7.409644 217.5707 2.188898e-07 7.116106e-05
Determine how many genes are up and down regulated for each pairwise comparison
For differentially expressed genes set logFC and pvalue threshold to 0.05 and 1
lfc=1 sets a 2-fold change minimum
is.de <- decideTestsDGE(HQE_vs_PWE,adjust.method="BH",p.value=0.05,lfc=1)
summary(is.de)
## 1*HQE -1*PWE
## Down 223
## NotSig 2926
## Up 102
plotMD(HQE_vs_PWE, status=is.de, values=c(1,-1), col=c("maroon","cadetblue3"),
legend="topright", cex = .5, main = "MD Plot : HQ E vs PW E")
Volcano Plot
volcanoData <- cbind(HQE_vs_PWE_all$table$logFC, -log10(HQE_vs_PWE_all$table$FDR))
colnames(volcanoData) <- c("logFC", "negLogPval")
DEGs <- HQE_vs_PWE_all$table$FDR < 0.05 & abs(HQE_vs_PWE_all$table$logFC) > 1
point.col <- ifelse(DEGs, "red", "black")
plot(volcanoData, pch=19, cex = .5, col = point.col, main = "Volcano Plot : HQ E vs PW E")
HQE_vs_PWE_top50_log2_cpm <- logcpm[rownames(HQE_vs_PWE_top50$table),]
pheatmap(subset(HQE_vs_PWE_top50_log2_cpm,select=c(HQE1,HQE2,PWE1,PWE2)),
color=colorRampPalette(c("navy", "lavender", "maroon"))(15),fontsize_row=4)
HQST_vs_HQE <- glmQLFTest(fit, contrast = my_contrasts[,"HQST_vs_HQE"])
HQST_vs_HQE_top50 = topTags(HQST_vs_HQE,adjust.method = "BH", p.value = 0.05, n = 50)
HQST_vs_HQE_all = topTags(HQST_vs_HQE,adjust.method = "BH", p.value = 0.05, n = nrow(HQST_vs_HQE$table))
HQST_vs_HQE_DEG <- HQST_vs_HQE_all[abs(HQST_vs_HQE_all$table$logFC) > 1, ]
HQST_vs_HQE_up <- HQST_vs_HQE_all[HQST_vs_HQE_all$table$logFC > 1, ]
HQST_vs_HQE_down <- HQST_vs_HQE_all[HQST_vs_HQE_all$table$logFC < -1, ]
head(HQST_vs_HQE_top50, n=10)
## Coefficient: 1*HQ_ST -1*HQE
## logFC logCPM F PValue FDR
## Phatr3_J23830 6.286260 8.103721 1546.1159 5.594732e-11 1.818847e-07
## Phatr3_J47667 11.239932 13.420073 1249.2782 1.387318e-10 2.014103e-07
## Phatr3_J40433 7.752019 14.655511 1162.2726 1.886329e-10 2.014103e-07
## Phatr3_J15126 -11.632476 6.667419 971.6138 4.041445e-10 2.014103e-07
## Phatr3_J40467 -5.679432 7.803590 965.0569 4.159431e-10 2.014103e-07
## Phatr3_J48511 4.590450 9.653790 930.4874 4.856776e-10 2.014103e-07
## Phatr3_J46796 9.157803 9.775273 924.0527 5.002087e-10 2.014103e-07
## Phatr3_J46395 -6.934256 8.292089 885.7125 5.988517e-10 2.014103e-07
## Phatr3_J10068 -6.734953 6.486422 878.3977 6.203162e-10 2.014103e-07
## Phatr3_J48834 4.924547 6.469906 861.1280 6.748816e-10 2.014103e-07
Determine how many genes are up and down regulated for each pairwise comparison
For differentially expressed genes set logFC and pvalue threshold to 0.05 and 1
lfc=1 sets a 2-fold change minimum
is.de <- decideTestsDGE(HQST_vs_HQE,adjust.method="BH",p.value=0.05,lfc=1)
summary(is.de)
## 1*HQ_ST -1*HQE
## Down 765
## NotSig 1734
## Up 752
plotMD(HQST_vs_HQE, status=is.de, values=c(1,-1), col=c("maroon","cadetblue3"),
legend="topright", cex = .5, main = "MD Plot : HQ St vs HQ E")
Volcano Plot
volcanoData <- cbind(HQST_vs_HQE_all$table$logFC, -log10(HQST_vs_HQE_all$table$FDR))
colnames(volcanoData) <- c("logFC", "negLogPval")
DEGs <- HQST_vs_HQE_all$table$FDR < 0.05 & abs(HQST_vs_HQE_all$table$logFC) > 1
point.col <- ifelse(DEGs, "red", "black")
plot(volcanoData, pch=19, cex = .5, col = point.col, main = "Volcano Plot : HQ St vs HQ E")
HQST_vs_HQE_top50_log2_cpm <- logcpm[rownames(HQST_vs_HQE_top50$table),]
pheatmap(subset(HQST_vs_HQE_top50_log2_cpm,select=c(HQ_ST1, HQ_ST1, HQE1, HQE2)),
color=colorRampPalette(c("navy", "lavender", "maroon"))(15),fontsize_row=4)
PWE_vs_PWST <- glmQLFTest(fit, contrast = my_contrasts[,"PWE_vs_PWST"])
PWE_vs_PWST_top50 = topTags(PWE_vs_PWST,adjust.method = "BH", p.value = 0.05, n = 50)
PWE_vs_PWST_all = topTags(PWE_vs_PWST,adjust.method = "BH", p.value = 0.05, n = nrow(PWE_vs_PWST$table))
PWE_vs_PWST_DEG <- PWE_vs_PWST_all[abs(PWE_vs_PWST_all$table$logFC) > 1, ]
PWE_vs_PWST_up <- PWE_vs_PWST_all[PWE_vs_PWST_all$table$logFC > 1, ]
PWE_vs_PWST_down <- PWE_vs_PWST_all[PWE_vs_PWST_all$table$logFC < -1, ]
head(PWE_vs_PWST_top50, n=10)
## Coefficient: -1*PW_ST 1*PWE
## logFC logCPM F PValue FDR
## Phatr3_J23830 -6.170073 8.103721 1468.9586 6.958730e-11 1.449607e-07
## Phatr3_J46597 -7.421010 9.152332 1204.6116 1.619890e-10 1.449607e-07
## Phatr3_J10068 7.728042 6.486422 1155.1607 1.936245e-10 1.449607e-07
## Phatr3_J47104 6.502905 6.792173 1145.7683 2.004689e-10 1.449607e-07
## Phatr3_J32747 7.098911 8.734087 1110.3420 2.291282e-10 1.449607e-07
## Phatr3_J48834 -5.807797 6.469906 1070.6144 2.675374e-10 1.449607e-07
## Phatr3_J47667 -9.408787 13.420073 1016.6111 3.333997e-10 1.503106e-07
## Phatr3_J40433 -6.822467 14.655511 977.0339 3.947017e-10 1.503106e-07
## Phatr3_EG00065 -6.173506 9.136149 953.6820 4.374371e-10 1.503106e-07
## Phatr3_J25308 7.803495 9.255371 941.3297 4.623518e-10 1.503106e-07
Determine how many genes are up and down regulated for each pairwise comparison
For differentially expressed genes set logFC and pvalue threshold to 0.05 and 1
lfc=1 sets a 2-fold change minimum
is.de <- decideTestsDGE(PWE_vs_PWST,adjust.method="BH",p.value=0.05,lfc=1)
summary(is.de)
## -1*PW_ST 1*PWE
## Down 785
## NotSig 1776
## Up 690
plotMD(PWE_vs_PWST, status=is.de, values=c(1,-1), col=c("maroon","cadetblue3"),
legend="topright", cex = .5, main = "MD Plot : PW E vs PW St")
Volcano Plot
volcanoData <- cbind(PWE_vs_PWST_all$table$logFC, -log10(PWE_vs_PWST_all$table$FDR))
colnames(volcanoData) <- c("logFC", "negLogPval")
DEGs <- PWE_vs_PWST_all$table$FDR < 0.05 & abs(PWE_vs_PWST_all$table$logFC) > 1
point.col <- ifelse(DEGs, "red", "black")
plot(volcanoData, pch=19, cex = .5, col = point.col, main = "Volcano Plot : PW E vs PW St")
PWE_vs_PWST_top50_log2_cpm <- logcpm[rownames(PWE_vs_PWST_top50$table),]
pheatmap(subset(PWE_vs_PWST_top50_log2_cpm,select=c(PWE1, PWE2, PW_ST1, PW_ST1)),
color=colorRampPalette(c("navy", "lavender", "maroon"))(15),fontsize_row=4)
Groups the normal media and PW and contrasts samples by their growth stage only
group<-c("exp","exp","exp","exp","st","st","exp","exp","st","st")
dge2<-DGEList(counts=star_data,group=group) #creates a DGE list object
dim(dge2)
## [1] 12392 10
full_dge<-dge2 #store original data just in case
# filtering & normalizing data
#keep only 100 counts per mil in at least 2 samples
head(cpm(dge2))
## HQ10E1 HQ10E2 HQE1 HQE2 HQ_ST1 HQ_ST2
## Phatr3_J31400 0.05087787 0.2236625 0.1336435 0.4334001 0.00000 0.2161654
## Phatr3_J42422 7.32641286 7.6939896 9.5332341 9.4728883 26.56036 24.3726541
## Phatr3_J31402 0.00000000 0.0000000 0.0000000 0.0000000 0.00000 0.0000000
## Phatr3_J42423 1.37370241 1.9682299 3.4301824 2.6004007 36.80130 27.4530117
## Phatr3_J42424 92.54684020 97.1589854 83.5717162 77.6405351 100.69153 99.4901468
## Phatr3_J7430 23.14942952 22.8135739 27.6196503 27.4280359 30.85495 27.7232185
## PWE1 PWE2 PW_ST1 PW_ST2
## Phatr3_J31400 0.1070611 0.05289409 0.5624836 0.910445
## Phatr3_J42422 6.2630738 5.34230276 11.4184165 11.517130
## Phatr3_J31402 0.0000000 0.00000000 0.0000000 0.000000
## Phatr3_J42423 2.4624051 2.06286938 6.1873193 13.838764
## Phatr3_J42424 81.2593682 79.76428279 58.2170497 71.105757
## Phatr3_J7430 17.8256717 24.11970355 18.9556964 16.570100
apply(dge2$counts, 2, sum)
## HQ10E1 HQ10E2 HQE1 HQE2 HQ_ST1 HQ_ST2 PWE1 PWE2
## 19654912 22355112 22447786 16151357 15135335 18504345 18680923 18905705
## PW_ST1 PW_ST2
## 17778297 21967279
keep2 <- rowSums(cpm(dge2)>100) >=2
dge2 <- dge2[keep2,]
dim(dge2) #check number of genes left after filtering
## [1] 3251 10
#resetting the library size
dge2$samples$lib.size <- colSums(dge2$counts)
dge2$samples
#now we can normalize the data
dge_norm2=calcNormFactors(dge2, method="TMM")
dge_norm2
## An object of class "DGEList"
## $counts
## HQ10E1 HQ10E2 HQE1 HQE2 HQ_ST1 HQ_ST2 PWE1 PWE2 PW_ST1 PW_ST2
## Phatr3_J42426 8520 9594 8689 7289 5396 7052 6769 7244 5813 8494
## Phatr3_EG02408 3806 4483 3948 2637 224 238 3580 3084 216 368
## Phatr3_J31409 6448 7590 9257 5980 1008 1394 4863 4622 1381 1864
## Phatr3_J42429 1747 1981 2134 1377 2150 2386 1650 1699 1427 1398
## Phatr3_J4937 6608 7884 7049 4583 3737 5446 6837 6271 2997 3700
## 3246 more rows ...
##
## $samples
## group lib.size norm.factors
## HQ10E1 exp 15289361 1.0347646
## HQ10E2 exp 17319367 1.0442405
## HQE1 exp 17111487 1.0884649
## HQE2 exp 12568283 1.0460172
## HQ_ST1 st 11248709 1.0977999
## HQ_ST2 st 13869364 1.0696829
## PWE1 exp 14606604 1.0251548
## PWE2 exp 14925823 1.0196206
## PW_ST1 st 14619994 0.8029116
## PW_ST2 st 17756624 0.8247657
# create design matrix
design.mat2 <- model.matrix(~ 0 + dge_norm2$samples$group)
colnames(design.mat2) <- levels(dge_norm2$samples$group)
#estimate the dispersion
d3 <- estimateGLMCommonDisp(dge_norm2,design.mat2)
d3 <- estimateGLMTrendedDisp(d3,design.mat2, method="auto")
# You can change method to "auto", "bin.spline", "power", "spline", "bin.loess".
# The default is "auto" which chooses "bin.spline" when > 200 tags and "power" otherwise.
d3 <- estimateGLMTagwiseDisp(d3,design.mat2)
plotBCV(d3)
#create a matrix of contrasts for anova-like testing
my_contrasts2<-makeContrasts(
#exponential vs stationary
exp_vs_st = exp-st,
levels= design.mat2
)
my_contrasts2
## Contrasts
## Levels exp_vs_st
## exp 1
## st -1
fit2 <- glmQLFit(d3, design.mat2)
E_vs_ST <- glmQLFTest(fit2, contrast = my_contrasts2)
E_vs_ST_top100 = topTags(E_vs_ST,adjust.method = "BH", p.value = 0.05, n = 100)
E_vs_ST_all = topTags(E_vs_ST,adjust.method = "BH", p.value = 0.05, n = nrow(E_vs_ST$table))
E_vs_ST_DEG <- E_vs_ST_all[abs(E_vs_ST_all$table$logFC) > 1, ]
E_vs_ST_up <- E_vs_ST_all[E_vs_ST_all$table$logFC > 1, ]
E_vs_ST_down <- E_vs_ST_all[E_vs_ST_all$table$logFC < -1, ]
head(E_vs_ST_top100, n=10)
## Coefficient: 1*exp -1*st
## logFC logCPM F PValue FDR
## Phatr3_J45757 -9.563363 10.000099 1354.2665 1.684748e-12 5.477116e-09
## Phatr3_J23717 5.472037 9.748688 885.9707 1.554619e-11 2.527033e-08
## Phatr3_J47612 -6.769131 10.785967 716.4619 4.710233e-11 3.908231e-08
## Phatr3_J36381 -5.753589 6.668757 713.6253 4.808651e-11 3.908231e-08
## Phatr3_J47006 4.918942 8.298303 673.7905 6.486447e-11 4.217488e-08
## Phatr3_J48511 -4.332105 9.654110 560.0490 1.696963e-10 9.194711e-08
## Phatr3_J12411 3.673927 6.993582 536.4989 2.120974e-10 9.850411e-08
## Phatr3_J44651 3.881203 7.976553 520.8250 2.473700e-10 1.005250e-07
## Phatr3_J49907 -3.531604 7.214636 484.8683 3.584177e-10 1.179003e-07
## Phatr3_EG02408 3.525080 7.151882 480.6009 3.752105e-10 1.179003e-07
Determine how many genes are up and down regulated for each pairwise comparison
For differentially expressed genes set logFC and pvalue threshold to 0.05 and 1
lfc=1 sets a 2-fold change minimum
is.de <- decideTestsDGE(E_vs_ST,adjust.method="BH",p.value=0.05,lfc=1)
summary(is.de)
## 1*exp -1*st
## Down 771
## NotSig 1792
## Up 688
plotMD(E_vs_ST, status=is.de, values=c(1,-1), col=c("maroon","cadetblue3"),
legend="topright", cex = .5, main = "MD Plot : Exponential vs Stationary Growth")
Volcano Plot
volcanoData <- cbind(E_vs_ST_all$table$logFC, -log10(E_vs_ST_all$table$FDR))
colnames(volcanoData) <- c("logFC", "negLogPval")
DEGs <- E_vs_ST_all$table$FDR < 0.05 & abs(E_vs_ST_all$table$logFC) > 1
point.col <- ifelse(DEGs, "red", "black")
plot(volcanoData, pch=19, cex = .5, col = point.col, main = "Volcano Plot : Exponential vs Stationary Growth")
E_vs_ST_top100_log2_cpm <- logcpm[rownames(E_vs_ST_top100$table),]
pheatmap(E_vs_ST_top100_log2_cpm,color=colorRampPalette(c("navy", "lavender", "maroon"))(15),fontsize_row=2.5)
common_HQ_vs_PW_DEGs <- intersect(row.names(HQst_vs_PWst_DEG$table), row.names(HQE_vs_PWE_DEG$table))
common_HQ_vs_PW_DEGs <- data.frame(common_HQ_vs_PW_DEGs)
library(openxlsx)
list_of_datasets <- list("HQst_vs_PWst" = HQst_vs_PWst_DEG$table,
"HQE_vs_PWE" = HQE_vs_PWE_DEG$table,
"HQST_vs_HQE" = HQST_vs_HQE_DEG$table,
"PWE_vs_PWST" = PWE_vs_PWST_DEG$table,
"E_vs_ST" = E_vs_ST_DEG$table,
"HQ_vs_PW" = common_HQ_vs_PW_DEGs
)
write.xlsx(list_of_datasets, file = "PW_DEGs.xlsx", row.names = TRUE)
Perform gene ontology of the common DEGs between HQst_vs_PWst and HQE_vs_PWE with gprofiler2
library(gprofiler2)
HQst_vs_PWst_over_rep <- gost(query = list("HQst vs PWst" = row.names(HQst_vs_PWst_DEG$table)),
organism = "ptricornutum",
ordered_query = TRUE,
measure_underrepresentation = FALSE,
evcodes = TRUE)
HQE_vs_PWE_over_rep <- gost(query = list("HQE vs PWE" = row.names(HQE_vs_PWE_DEG$table)),
organism = "ptricornutum",
ordered_query = TRUE,
measure_underrepresentation = FALSE,
evcodes = TRUE)
HQst_vs_PWst_under_rep <- gost(query = list("HQst vs PWst" = row.names(HQst_vs_PWst_DEG$table)),
organism = "ptricornutum",
ordered_query = TRUE,
measure_underrepresentation = TRUE,
evcodes = TRUE)
HQE_vs_PWE_under_rep <- gost(query = list("HQE vs PWE" = row.names(HQE_vs_PWE_DEG$table)),
organism = "ptricornutum",
ordered_query = TRUE,
measure_underrepresentation = TRUE,
evcodes = TRUE)
Visualize the over-represented GO terms of HQst vs PWst
gostplot(HQst_vs_PWst_over_rep, capped = TRUE, interactive = TRUE)
Top 20 GO terms of over-represented DEGs in HQst vs PWst by lowest p-values
publish_gosttable(HQst_vs_PWst_over_rep,
highlight_terms = HQst_vs_PWst_over_rep$result[order(HQst_vs_PWst_over_rep$result$p_value),][1:20,])
Visualize the under-represented GO terms of HQst vs PWst
gostplot(HQst_vs_PWst_under_rep, capped = TRUE, interactive = TRUE)
Top 20 GO terms of under-represented DEGs in HQst vs PWst by lowest p-values
publish_gosttable(HQst_vs_PWst_under_rep,
highlight_terms = HQst_vs_PWst_under_rep$result[order(HQst_vs_PWst_under_rep$result$p_value),][1:20,])
Visualize the over-represented GO terms of HQE vs PWE
gostplot(HQE_vs_PWE_over_rep, capped = TRUE, interactive = TRUE)
Top 20 GO terms of over-represented DEGs in HQE vs PWE by lowest p-values
publish_gosttable(HQE_vs_PWE_over_rep,
highlight_terms = HQE_vs_PWE_over_rep$result[order(HQE_vs_PWE_over_rep$result$p_value),][1:20,]
)
Visualize the under-represented GO terms of HQE vs PWE
gostplot(HQE_vs_PWE_under_rep, capped = TRUE, interactive = TRUE)
Top 20 GO terms of under-represented DEGs in HQE vs PWE by lowest p-values
publish_gosttable(HQE_vs_PWE_under_rep,
highlight_terms = HQE_vs_PWE_under_rep$result[order(HQE_vs_PWE_under_rep$result$p_value),][1:20,]
)
common_HQ_vs_PW_over_rep <- gost(query = list("Common Over-rep DEGs: HQ vs PW" = common_HQ_vs_PW_DEGs[,1]),
organism = "ptricornutum",
ordered_query = TRUE,
measure_underrepresentation = FALSE,
evcodes = TRUE)
common_HQ_vs_PW_under_rep <- gost(query = list("Common Under-rep DEGs: HQ vs PW" = common_HQ_vs_PW_DEGs[,1]),
organism = "ptricornutum",
ordered_query = TRUE,
measure_underrepresentation = TRUE,
evcodes = TRUE)
gostplot(common_HQ_vs_PW_over_rep, capped = TRUE, interactive = TRUE)
publish_gosttable(common_HQ_vs_PW_over_rep,
highlight_terms = common_HQ_vs_PW_over_rep$result[order(common_HQ_vs_PW_over_rep$result$p_value),][1:20,])
gostplot(common_HQ_vs_PW_under_rep, capped = TRUE, interactive = TRUE)
publish_gosttable(common_HQ_vs_PW_under_rep,
highlight_terms = common_HQ_vs_PW_under_rep$result[order(common_HQ_vs_PW_under_rep$result$p_value),][1:20,])
Write gene ontology results to an Excel file
columns <- c("term_id",
"source",
"term_name",
"p_value",
"effective_domain_size",
"intersection_size",
"intersection")
GO_results <- list("HQst_vs_PWst_over_rep" = HQst_vs_PWst_over_rep$result[, columns],
"HQst_vs_PWst_under_rep" = HQst_vs_PWst_under_rep$result[, columns],
"HQE_vs_PWE_over_rep" = HQE_vs_PWE_over_rep$result[, columns],
"HQE_vs_PWE_under_rep" = HQE_vs_PWE_under_rep$result[, columns],
"common_HQ_vs_PW_over_rep" = common_HQ_vs_PW_over_rep$result[, columns],
"common_HQ_vs_PW_under_rep" = common_HQ_vs_PW_under_rep$result[, columns]
)
write.xlsx(GO_results, file = "PW_GO_results.xlsx", row.names = TRUE)
HQst_vs_PWst_up_reg_over_rep <- gost(query = list("HQst vs PWst Up-regulated" = row.names(HQst_vs_PWst_up$table)),
organism = "ptricornutum",
ordered_query = TRUE,
measure_underrepresentation = FALSE,
evcodes = TRUE)
Over-represented GO terms of Up-regulated DEGs in HQst vs PWst
gostplot(HQst_vs_PWst_up_reg_over_rep, capped = TRUE, interactive = TRUE)
publish_gosttable(HQst_vs_PWst_up_reg_over_rep,
highlight_terms = HQst_vs_PWst_up_reg_over_rep$result[order(HQst_vs_PWst_up_reg_over_rep$result$p_value),][1:20,])
Over-represented GO terms of Down-regulated DEGs in HQst vs PWst
HQst_vs_PWst_down_reg_over_rep <- gost(query = list("HQst vs PWst Down-regulated" = row.names(HQst_vs_PWst_down$table)),
organism = "ptricornutum",
ordered_query = TRUE,
measure_underrepresentation = FALSE,
evcodes = TRUE)
gostplot(HQst_vs_PWst_down_reg_over_rep, capped = TRUE, interactive = TRUE)
publish_gosttable(HQst_vs_PWst_down_reg_over_rep,
highlight_terms = HQst_vs_PWst_down_reg_over_rep$result[order(HQst_vs_PWst_down_reg_over_rep$result$p_value),][1:20,])
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gprofiler2_0.2.0 openxlsx_4.2.3 pheatmap_1.0.12 edgeR_3.32.1
## [5] limma_3.46.0 readr_1.4.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.6 locfit_1.5-9.4 lattice_0.20-41 tidyr_1.1.3
## [5] assertthat_0.2.1 digest_0.6.27 utf8_1.2.1 mime_0.10
## [9] R6_2.5.0 evaluate_0.14 httr_1.4.2 ggplot2_3.3.3
## [13] highr_0.9 pillar_1.6.0 rlang_0.4.10 lazyeval_0.2.2
## [17] curl_4.3 rstudioapi_0.13 data.table_1.14.0 jquerylib_0.1.4
## [21] rmarkdown_2.8 splines_4.0.4 stringr_1.4.0 htmlwidgets_1.5.3
## [25] RCurl_1.98-1.3 munsell_0.5.0 shiny_1.6.0 compiler_4.0.4
## [29] httpuv_1.6.1 xfun_0.22 pkgconfig_2.0.3 htmltools_0.5.1.1
## [33] tidyselect_1.1.1 gridExtra_2.3 tibble_3.1.0 fansi_0.4.2
## [37] viridisLite_0.4.0 later_1.2.0 crayon_1.4.1 dplyr_1.0.5
## [41] bitops_1.0-7 grid_4.0.4 xtable_1.8-4 jsonlite_1.7.2
## [45] gtable_0.3.0 lifecycle_1.0.0 DBI_1.1.1 magrittr_2.0.1
## [49] scales_1.1.1 zip_2.1.1 cli_2.5.0 stringi_1.5.3
## [53] promises_1.2.0.1 bslib_0.2.5 ellipsis_0.3.1 generics_0.1.0
## [57] vctrs_0.3.7 RColorBrewer_1.1-2 tools_4.0.4 glue_1.4.2
## [61] purrr_0.3.4 hms_1.0.0 crosstalk_1.1.1 fastmap_1.1.0
## [65] yaml_2.2.1 colorspace_2.0-0 plotly_4.9.3 knitr_1.33
## [69] sass_0.4.0